BAP treated hESC (day 8) snRNAseq data analyses (v2)

snRNAseq data analyses

Prerequisites

This experiment has 2 conditions (BAP treated hESC exposed to 20% oxygen and 5% oxygen), with 2 replicates each. The details are provided in the manuscript. The packages that are needed for the analyses were loaded as below. If you need the version information, session information is printed at the bottom of this wiki.

setwd("/work/LAS/geetu-lab/arnstrm/timecourse/5_cr-count.o2stress/version_1")
# load the modules
library(Seurat)
#> Attaching SeuratObject
library(knitr)
library(kableExtra)
library(ggplot2)
library(cowplot)
library(patchwork)
#> 
#> Attaching package: 'patchwork'
#> The following object is masked from 'package:cowplot':
#> 
#>     align_plots
library(metap)
library(multtest)
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#>     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#>     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#>     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(gridExtra)
#> 
#> Attaching package: 'gridExtra'
#> The following object is masked from 'package:Biobase':
#> 
#>     combine
#> The following object is masked from 'package:BiocGenerics':
#> 
#>     combine
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:gridExtra':
#> 
#>     combine
#> The following object is masked from 'package:Biobase':
#> 
#>     combine
#> The following objects are masked from 'package:BiocGenerics':
#> 
#>     combine, intersect, setdiff, union
#> The following object is masked from 'package:kableExtra':
#> 
#>     group_rows
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(stringr)
library(TissueEnrich)
#> Loading required package: ensurer
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: 'matrixStats'
#> The following object is masked from 'package:dplyr':
#> 
#>     count
#> The following objects are masked from 'package:Biobase':
#> 
#>     anyMissing, rowMedians
#> 
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> The following object is masked from 'package:Biobase':
#> 
#>     rowMedians
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: S4Vectors
#> 
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:dplyr':
#> 
#>     first, rename
#> The following objects are masked from 'package:base':
#> 
#>     expand.grid, I, unname
#> Loading required package: IRanges
#> 
#> Attaching package: 'IRanges'
#> The following objects are masked from 'package:dplyr':
#> 
#>     collapse, desc, slice
#> Loading required package: GenomeInfoDb
#> 
#> Attaching package: 'SummarizedExperiment'
#> The following object is masked from 'package:SeuratObject':
#> 
#>     Assays
#> The following object is masked from 'package:Seurat':
#> 
#>     Assays
#> Loading required package: GSEABase
#> Loading required package: annotate
#> Loading required package: AnnotationDbi
#> 
#> Attaching package: 'AnnotationDbi'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> Loading required package: XML
#> Loading required package: graph
#> 
#> Attaching package: 'graph'
#> The following object is masked from 'package:XML':
#> 
#>     addNode
#> The following object is masked from 'package:stringr':
#> 
#>     boundary
library(gprofiler2)
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ tibble  3.1.6     ✓ purrr   0.3.4
#> ✓ tidyr   1.2.0     ✓ forcats 0.5.1
#> ✓ readr   2.1.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x graph::boundary()        masks stringr::boundary()
#> x IRanges::collapse()      masks dplyr::collapse()
#> x dplyr::combine()         masks gridExtra::combine(), Biobase::combine(), BiocGenerics::combine()
#> x matrixStats::count()     masks dplyr::count()
#> x IRanges::desc()          masks dplyr::desc()
#> x tidyr::expand()          masks S4Vectors::expand()
#> x dplyr::filter()          masks stats::filter()
#> x S4Vectors::first()       masks dplyr::first()
#> x dplyr::group_rows()      masks kableExtra::group_rows()
#> x dplyr::lag()             masks stats::lag()
#> x BiocGenerics::Position() masks ggplot2::Position(), base::Position()
#> x purrr::reduce()          masks GenomicRanges::reduce(), IRanges::reduce()
#> x S4Vectors::rename()      masks dplyr::rename()
#> x AnnotationDbi::select()  masks dplyr::select()
#> x IRanges::slice()         masks dplyr::slice()
library(enhancedDimPlot)
library(calibrate)
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:AnnotationDbi':
#> 
#>     select
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> The following object is masked from 'package:patchwork':
#> 
#>     area
library(ggrepel)
library(dittoSeq)
library(ComplexHeatmap)
#> Loading required package: grid
#> ========================================
#> ComplexHeatmap version 2.10.0
#> Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
#> Github page: https://github.com/jokergoo/ComplexHeatmap
#> Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
#> 
#> If you use it in published research, please cite:
#> Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
#>   genomic data. Bioinformatics 2016.
#> 
#> The new InteractiveComplexHeatmap package can directly export static 
#> complex heatmaps into an interactive Shiny app with zero effort. Have a try!
#> 
#> This message can be suppressed by:
#>   suppressPackageStartupMessages(library(ComplexHeatmap))
#> ========================================
library(scales)
#> 
#> Attaching package: 'scales'
#> The following object is masked from 'package:purrr':
#> 
#>     discard
#> The following object is masked from 'package:readr':
#> 
#>     col_factor
library(ggvenn)
library(plotly)
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ComplexHeatmap':
#> 
#>     add_heatmap
#> The following object is masked from 'package:MASS':
#> 
#>     select
#> The following object is masked from 'package:AnnotationDbi':
#> 
#>     select
#> The following object is masked from 'package:IRanges':
#> 
#>     slice
#> The following object is masked from 'package:S4Vectors':
#> 
#>     rename
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout
library(DT)
#> 
#> Attaching package: 'DT'
#> The following object is masked from 'package:SeuratObject':
#> 
#>     JS
#> The following object is masked from 'package:Seurat':
#> 
#>     JS
library(ape)
#> 
#> Attaching package: 'ape'
#> The following objects are masked from 'package:graph':
#> 
#>     complement, degree, edges
library(enrichR)
#> Welcome to enrichR
#> Checking connection ...
#> Enrichr ... Connection is Live!
#> FlyEnrichr ... Connection is available!
#> WormEnrichr ... Connection is available!
#> YeastEnrichr ... Connection is available!
#> FishEnrichr ... Connection is available!
library(SeuratWrappers)

Importing 10x Datasets

The 10X data was already processed with CellRanger (v.4.0.0) and the counts table was ready for us to import for the data analyses. We used the inbuilt function to import this data and to create a Seurat object as described below.


experiment_name = "BAP"
dataset_loc <- "/work/LAS/geetu-lab/arnstrm/timecourse/1_data/input_v6.1.2/with_introns"
ids <- c("5pcO2_r1", "5pcO2_r2", "20pcO2_r1", "20pcO2_r2")
# function d10x.data
d10x.data <- sapply(ids, function(i){
  d10x <- Read10X(file.path(dataset_loc,i,"filtered_feature_bc_matrix"))
  colnames(d10x) <- paste(sapply(strsplit(colnames(d10x),split="-"), '[[' , 1L ), i, sep="-")
  d10x
})

experiment.data <- do.call("cbind", d10x.data)
bapd8.combined <- CreateSeuratObject(
  experiment.data,
  project = "BAPd8",
  min.cells = 10,
  min.genes = 200,
  names.field = 2,
  names.delim = "\\-")

# backup the object
bapd8.temp <- bapd8.combined

Data quality insepction

After the data was imported, we checked the quality of the data. Mitochondrial expression is an important criteria (along with other quantitative features of each nuclei) to decide if the nucleus is good or bad. We tested it as follows

MT ratio in nucleus

bapd8.combined$log10GenesPerUMI <- log10(bapd8.combined$nFeature_RNA) / log10(bapd8.combined$nCount_RNA)
bapd8.combined$mitoRatio <- PercentageFeatureSet(object = bapd8.combined, pattern = "^MT-")
bapd8.combined$mitoRatio <- bapd8.combined@meta.data$mitoRatio / 100
metadata <- bapd8.combined@meta.data
metadata$cells <- rownames(metadata)
metadata <- metadata %>%
  dplyr::rename(seq_folder = orig.ident,
                nUMI = nCount_RNA,
                nGene = nFeature_RNA,
                seq_folder = orig.ident)

p <- ggplot(dat = metadata, aes(x=nUMI, y=nGene, color=mitoRatio)) +
  geom_point(alpha = 0.5) +
  scale_colour_gradient(low = "gray90", high = "black") + labs(colour="MT ratio") +
  theme_bw(base_size = 12) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    strip.background = element_blank(),
    panel.border = element_rect(colour = "black")) +
  xlab("RNA counts") + ylab("Gene counts") +
  stat_smooth(method=lm) +
  facet_wrap(~seq_folder, labeller = labeller(seq_folder =
                                                c("20pcO2_r1" = "20% Oxygen (rep1)",
                                                  "20pcO2_r2" = "20% Oxygen (rep2)",
                                                  "5pcO2_r1" = "5% Oxygen (rep1)",
                                                  "5pcO2_r2" = "5% Oxygen (rep2)"))) +
  scale_y_continuous(label=comma) +
  scale_x_continuous(label=comma)

ggplotly(p)
#> `geom_smooth()` using formula 'y ~ x'

Relationship between the total molecules detected (transcripts) vs. total genes detected across samples. Each nucleus is represented as a dot, with the color intensity representing the mitochondrial read ratio in that nucleus.

Number of nuclei per sample

ggplot(metadata, aes(x=seq_folder, fill=seq_folder)) +
  geom_bar() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust=0.5, face="bold")) +
  ggtitle("Number of Nuclei")
Number of nuclei found in each sample

Number of nuclei found in each sample

Density of nuclei per sample

ggplot(metadata, aes(color=seq_folder, x=nUMI, fill= seq_folder)) +
  geom_density(alpha = 0.2) +
  scale_x_log10() +
  theme_classic() +
  ylab("Cell density") +
  geom_vline(xintercept = 500)
Density of nuclei vs. UMIs

Density of nuclei vs. UMIs

Number of Nuclei vs. genes

ggplot(metadata, aes(x=seq_folder, y=log10(nGene), fill=seq_folder)) +
  geom_boxplot() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust=0.5, face="bold")) +
  ggtitle("NNuclei vs NGenes")
Nubmer of nuclei vs. genes

Nubmer of nuclei vs. genes

Number of Nuclei vs. genes

ggplot(metadata, aes(x=log10GenesPerUMI, color = seq_folder, fill=seq_folder)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  geom_vline(xintercept = 0.8)
Nubmer of Nuclei vs. genes

Nubmer of Nuclei vs. genes

Mitochondrial density across samples

ggplot(metadata, aes(color=seq_folder, x=mitoRatio, fill=seq_folder)) +
  geom_density(alpha = 0.2) +
  scale_x_log10() +
  theme_classic() +
  geom_vline(xintercept = 0.2)
#> Warning: Transformation introduced infinite values in continuous x-axis
#> Warning: Removed 17 rows containing non-finite values (stat_density).
Mitochondrial density across samples

Mitochondrial density across samples

Data filtering

After inspection, we decided to remove all mitochondiral genes as well as ribosomal genes from our analyses.

set up the metadata file and organize

bapd8.combined <- bapd8.temp
df <- bapd8.combined@meta.data
df$replicate <- NA
df$replicate[which(str_detect(df$orig.ident, "5pcO2"))] <- "5pcO2"
df$replicate[which(str_detect(df$orig.ident, "20pcO2"))] <- "20pcO2"
bapd8.combined@meta.data <- df
bapd8.combined[["percent.mt"]] <- PercentageFeatureSet(bapd8.combined, pattern = "^MT-")
datatable(bapd8.combined@meta.data, rownames = TRUE, filter="top", options = list(pageLength = 15, scrollX=T) )

set up the metadata file and organize

v1 <- VlnPlot(bapd8.combined, features = "nFeature_RNA", pt.size = 1) +
  geom_hline(yintercept=200, color = "red", size=1) +
  geom_hline(yintercept=7500, color = "red", size=1) +
  theme(legend.position = "none")
v2 <- VlnPlot(bapd8.combined, features = "nCount_RNA", pt.size = 1) +
  theme(legend.position = "none")
v3 <- VlnPlot(bapd8.combined, features = "percent.mt", pt.size = 1) +
  geom_hline(yintercept=15, color = "red", size=1) +
  theme(legend.position = "none")
v1 | v2 | v3

Before filtering

B1 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "percent.mt")
B2 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
B1 | B2

Preliminary filtering

bapd8.combined <- subset(bapd8.combined, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 25)
I1 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "percent.mt")
I2 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

I1 | I2

Final filtering

bapd8.combined <- subset(bapd8.combined, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 15)
A1 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "percent.mt")
A2 <- FeatureScatter(bapd8.combined, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
A1 | A2

Removing ribosomal and MT proteins

counts <- GetAssayData(object = bapd8.combined, slot = "counts")
counts <- counts[grep(pattern = "^MT", x = rownames(counts), invert = TRUE),]
counts <- counts[grep(pattern = "^MT", x = rownames(counts), invert = TRUE),]
counts <- counts[grep(pattern = "^RPL", x = rownames(counts), invert = TRUE),]
counts <- counts[grep(pattern = "^RPS", x = rownames(counts), invert = TRUE),]
counts <- counts[grep(pattern = "^MRPS", x = rownames(counts), invert = TRUE),]
counts <- counts[grep(pattern = "^MRPL", x = rownames(counts), invert = TRUE),]
keep_genes <- Matrix::rowSums(counts) >= 10
filtered_counts <- counts[keep_genes, ]
bapd8.fcombined <- CreateSeuratObject(filtered_counts, meta.data = bapd8.combined@meta.data)
bapd8.fcombined@meta.data <- bapd8.fcombined@meta.data[1:4]
bapd8.combined <- bapd8.fcombined

Data integration and Clustering

Seurat package was used for integrating samples and running the snRNAseq analyses.

data integration

set.seed(1111)
bapd8.list <- SplitObject(bapd8.combined, split.by = "orig.ident")
bapd8.list <- lapply(X = bapd8.list, FUN = function(x) {
  x <- NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

Seurat

bapd8.anchors <- FindIntegrationAnchors(object.list = bapd8.list, dims = 1:20)
#> Computing 2000 integration features
#> Scaling features for provided objects
#> Finding all pairwise anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 2945 anchors
#> Filtering anchors
#>  Retained 2531 anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 4571 anchors
#> Filtering anchors
#>  Retained 2721 anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 3619 anchors
#> Filtering anchors
#>  Retained 2070 anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 3814 anchors
#> Filtering anchors
#>  Retained 2652 anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 3227 anchors
#> Filtering anchors
#>  Retained 2259 anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 5013 anchors
#> Filtering anchors
#>  Retained 3810 anchors
bapd8.integrated <- IntegrateData(anchorset = bapd8.anchors, dims = 1:20)
#> Merging dataset 2 into 1
#> Extracting anchors for merged samples
#> Finding integration vectors
#> Finding integration vector weights
#> Integrating data
#> Merging dataset 4 into 3
#> Extracting anchors for merged samples
#> Finding integration vectors
#> Finding integration vector weights
#> Integrating data
#> Merging dataset 1 2 into 3 4
#> Extracting anchors for merged samples
#> Finding integration vectors
#> Finding integration vector weights
#> Integrating data
DefaultAssay(bapd8.integrated) <- "integrated"
bapd8.integrated <- ScaleData(bapd8.integrated, verbose = FALSE)
bapd8.integrated <- RunPCA(bapd8.integrated, npcs = 30, verbose = FALSE)
bapd8.integrated <- RunUMAP(bapd8.integrated, reduction = "pca", dims = 1:20)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 17:25:05 UMAP embedding parameters a = 0.9922 b = 1.112
#> 17:25:05 Read 6428 rows and found 20 numeric columns
#> 17:25:05 Using Annoy for neighbor search, n_neighbors = 30
#> 17:25:05 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 17:25:06 Writing NN index file to temp file /tmp/Rtmptux5Nm/file2721276d1959b
#> 17:25:06 Searching Annoy index using 1 thread, search_k = 3000
#> 17:25:08 Annoy recall = 100%
#> 17:25:08 Commencing smooth kNN distance calibration using 1 thread
#> 17:25:10 Initializing from normalized Laplacian + noise
#> 17:25:10 Commencing optimization for 500 epochs, with 279604 positive edges
#> 17:25:17 Optimization finished
bapd8.integrated <- FindNeighbors(bapd8.integrated, reduction = "pca", dims = 1:20)
#> Computing nearest neighbor graph
#> Computing SNN
bapd8.integrated <- FindClusters(bapd8.integrated, resolution = 0.5)
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 6428
#> Number of edges: 254094
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8858
#> Number of communities: 11
#> Elapsed time: 0 seconds

Renumber the clusters

By default Seurat assigns the cluster identity starting from zero. Since we prefer identity starting from one, we renumbered cluster 0-10 to 1-11.

num.clusters <- nlevels(bapd8.integrated$seurat_clusters)
df <- bapd8.integrated@meta.data
df$new_clusters <- as.factor(as.numeric(df$seurat_clusters))
bapd8.integrated@meta.data <- df
Idents(bapd8.integrated) <- "new_clusters"

dimplots (colored based on clusters)

The Dimensional reduction plot was plotted using the Seurat DipPlot function, with colors representing different groups/clusters.

d1 <- enhancedDimPlot(object = bapd8.integrated, grouping_var = 'ident', reduction = "umap", label = TRUE, pt.size = 1, alpha = 0.5) +
  ggtitle("A") + xlab("UMAP_1") + ylab("UMAP_2") +
  theme_classic() +
  theme(legend.position = "none", plot.title = element_text(face = "bold"))
ggplotly(d1)
#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[11L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

Fig2A: Dimensional reduction plot showing 5,355 nuclei plotted in two dimensions. The colored dots represent individual nuclei and are assigned based on cluster identity

dimplots (colored based on conditions)

d2 <- enhancedDimPlot(object = bapd8.integrated, grouping_var = 'replicate', reduction = "umap", label = FALSE, pt.size = 1, alpha = 0.4) +
  ggtitle("B") +
  xlab("UMAP_1") +
  ylab("UMAP_2") +
  theme_classic() +
  theme(legend.justification = c(1, 1), legend.position = c(1, 1), plot.title = element_text(face = "bold")) +
  scale_colour_manual(name = "Conditions",
                      labels = c(expression(paste('20% ', 'O'[2])), expression(paste('5% ', 'O'[2]))),
                      values = c("20pcO2" = "#0571b0", "5pcO2" = "#ca0020")) +
  scale_fill_manual(name = "Conditions",
                    labels = c(expression(paste('20% ', 'O'[2])), expression(paste('5% ', 'O'[2]))),
                    values = c("20pcO2" = "#0571b0", "5pcO2" = "#ca0020")) +
  scale_linetype_manual(values = "blank")
ggplotly(d2)

Fig2B: Dimensional reduction plot showing 5,355 nuclei plotted in two dimensions. The colored dots represent individual nuclei and are assigned based on treatment.

dimplots (colored based on samples)


d3 <- enhancedDimPlot(object = bapd8.integrated, grouping_var = 'orig.ident', reduction = "umap", label = FALSE, pt.size = 1, alpha = 0.4) +
  ggtitle("C") +
  xlab("UMAP_1") +
  ylab("UMAP_2") +
  theme_classic() +
  theme(legend.justification = c(1, 1), legend.position = c(1, 1), plot.title = element_text(face = "bold")) +
  scale_colour_manual(name = "Replicates",
                      labels = c(expression(paste('20% ', 'O'[2], ' rep1')), expression(paste('20% ', 'O'[2], ' rep2')), expression(paste('5% ', 'O'[2], ' rep1')), expression(paste('5% ', 'O'[2], ' rep1'))),
                      values = c("20pcO2_r1" = "#0571b0", "20pcO2_r2" = "#92c5de", "5pcO2_r1" = "#ca0020", "5pcO2_r2" = "#f4a582")) +
  scale_fill_manual(name = "Replicates",
                    labels = c(expression(paste('20% ', 'O'[2], ' rep1')), expression(paste('20% ', 'O'[2], ' rep2')), expression(paste('5% ', 'O'[2], ' rep1')), expression(paste('5% ', 'O'[2], ' rep1'))),
                    values = c("20pcO2_r1" = "#0571b0", "20pcO2_r2" = "#92c5de", "5pcO2_r1" = "#ca0020", "5pcO2_r2" = "#f4a582")) +
  scale_linetype_manual(values = "blank")

ggplotly(d3)

Fig2C: Dimensional reduction plot showing 5,355 nuclei plotted in two dimensions. The colored dots represent individual nuclei and are assigned based on replicate

Find markers

Find markers for each cluster. The Seurat command FindMarkers was run using the cluster identity assigned in the previous step. Additional column with the Fold (converted from natural log fold change of seurat output) was added to the table. The filtering was done for genes with avg_FC >= 1.5 and p_val_adj <= 0.05.

DefaultAssay(bapd8.integrated) <- "RNA"
lhs.a  <- paste("markers.all.", 1:num.clusters, sep="")
rhs.a <- paste("FindMarkers(bapd8.integrated, ident.1 = ",1:num.clusters," )", sep="")
commands.a <- paste(paste(lhs.a, rhs.a, sep="<-"), collapse=";")
eval(parse(text=commands.a))
#markers.all.1$avg_FC <-   2^markers.all.1$avg_log2FC
lhs.b  <- paste("markers.all.", 1:num.clusters, "$avg_FC", sep="")
rhs.b <- paste("2^markers.all.",1:num.clusters,"$avg_log2FC", sep="")
commands.b <- paste(paste(lhs.b, rhs.b, sep="<-"), collapse=";")
eval(parse(text=commands.b))
lhs.c  <- paste("markers.filtered.", 1:num.clusters, sep="")
rhs.c <- paste("markers.all.",1:num.clusters," %>% filter(avg_FC >= 1.5) %>% filter(p_val_adj <= 0.05) %>% arrange(desc(avg_FC))", sep="")
commands.c <- paste(paste(lhs.c, rhs.c, sep="<-"), collapse=";")
eval(parse(text=commands.c))
lhs.d  <- paste("markers.filtered.names.", 1:num.clusters, sep="")
rhs.d <- paste("rownames(markers.filtered.",1:num.clusters,")", sep="")
commands.d <- paste(paste(lhs.d, rhs.d, sep="<-"), collapse=";")
eval(parse(text=commands.d))

Check the number of markers

message (paste("Cluster 1 as", length(markers.filtered.names.1), "markers", sep = " "))
#> Cluster 1 as 627 markers
message (paste("Cluster 2 as", length(markers.filtered.names.2), "markers", sep = " "))
#> Cluster 2 as 449 markers
message (paste("Cluster 3 as", length(markers.filtered.names.3), "markers", sep = " "))
#> Cluster 3 as 319 markers
message (paste("Cluster 4 as", length(markers.filtered.names.4), "markers", sep = " "))
#> Cluster 4 as 682 markers
message (paste("Cluster 5 as", length(markers.filtered.names.5), "markers", sep = " "))
#> Cluster 5 as 405 markers
message (paste("Cluster 6 as", length(markers.filtered.names.6), "markers", sep = " "))
#> Cluster 6 as 34 markers
message (paste("Cluster 7 as", length(markers.filtered.names.7), "markers", sep = " "))
#> Cluster 7 as 371 markers
message (paste("Cluster 8 as", length(markers.filtered.names.8), "markers", sep = " "))
#> Cluster 8 as 399 markers
message (paste("Cluster 9 as", length(markers.filtered.names.9), "markers", sep = " "))
#> Cluster 9 as 426 markers
message (paste("Cluster 10 as", length(markers.filtered.names.10), "markers", sep = " "))
#> Cluster 10 as 457 markers
message (paste("Cluster 11 as", length(markers.filtered.names.11), "markers", sep = " "))
#> Cluster 11 as 656 markers

Marker plots

Combined expression of all the markers genes in various clusters. The markers have higher expression in their respecitve cluster than compared to the rest of the clusters.

ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}

color_list <- ggplotColours(n=11)

grouped_violinPlots <- function(markersfile, clusternumber, seuratobject = bapd8.integrated) {
  dittoPlotVarsAcrossGroups(seuratobject, markersfile,
                            group.by = "new_clusters", main = paste("Cluster ", clusternumber, " markers"),
                            xlab = "Clusters",
                            ylab = "Mean z-score expression",
                            x.labels = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4",
                                         "Cluster 5", "Cluster 6", "Cluster 7", "Cluster 8",
                                         "Cluster 9", "Cluster 10", "Cluster 11"),
                            vlnplot.lineweight = 0.5,
                            legend.show = FALSE,
                            jitter.size = 0.5,
                            color.panel = color_list)
}

Markers of cluster 1

grouped_violinPlots(markers.filtered.names.1, 1)

Markers of cluster 2

grouped_violinPlots(markers.filtered.names.2, 2)

Markers of cluster 3

grouped_violinPlots(markers.filtered.names.3, 3)

Markers of cluster 4

grouped_violinPlots(markers.filtered.names.4, 4)

Markers of cluster 5

grouped_violinPlots(markers.filtered.names.5, 5)

Markers of cluster 6

grouped_violinPlots(markers.filtered.names.6, 6)

Markers of cluster 7

grouped_violinPlots(markers.filtered.names.7, 7)

Markers of cluster 8

grouped_violinPlots(markers.filtered.names.8, 8)

Markers of cluster 9

grouped_violinPlots(markers.filtered.names.9, 9)

Markers of cluster 10

grouped_violinPlots(markers.filtered.names.10, 10)

Markers of cluster 11

grouped_violinPlots(markers.filtered.names.11, 11)

Run PlacentaCellEnrich on markers

PlacentaCellEnrich was run command-line using the TissueEnrich R package. We used this to assign cell identity to cluster 2, 3, 5 and 6. The function used for running PCE is as follows:

# Vento-Tormo et al., dataset
input="/work/LAS/geetu-lab/arnstrm/timecourse/1_data/input_v6.1.2/other.info/"
l <-
  load(file = paste0(input, "combine-test-expression1.Rdata"))
humanGeneMapping <- dataset$GRCH38$humanGeneMapping
d <- dataset$PlacentaDeciduaBloodData
data <- d$expressionData
cellDetails <- d$cellDetails

# Xiang et al., dataset
te.dataset.xiang <- readRDS(paste0(input, "te.dataset.xiang.rds"))

# Castel et al., dataset
te.dataset.castel <- readRDS(paste0(input, "te.dataset.castel.rds"))

# full names for cell types
xi.md <-
  read.csv(
    paste0(input, "/md-xi.tsv"),
    sep = "\t",
    header = TRUE,
    row.names = 1
  )
vt.md <-
  read.csv(
    paste0(input, "md-vt.tsv"),
    sep = "\t",
    header = TRUE,
    row.names = 1
  )
zp.md <-
  read.csv(
    paste0(input, "md-zp.tsv"),
    sep = "\t",
    header = TRUE,
    row.names = 1
  )

runpce <- function(inputgenelist, barcolor) {
  inputGenes <- toupper(inputgenelist)
  gs1 <- GeneSet(geneIds = toupper(inputgenelist))
  humanGene <-
    humanGeneMapping[humanGeneMapping$Gene.name %in% inputGenes, ]
  inputGenes <- humanGene$Gene
  expressionData <-
    data[intersect(row.names(data), humanGeneMapping$Gene), ]
  se <-
    SummarizedExperiment(
      assays = SimpleList(as.matrix(expressionData)),
      rowData = row.names(expressionData),
      colData = colnames(expressionData)
    )
  cellSpecificGenesExp <-
    teGeneRetrieval(se, expressedGeneThreshold = 1)
  gs <- GeneSet(geneIds = toupper(inputGenes))
  output.vt <- teEnrichmentCustom(gs, cellSpecificGenesExp)
  en.output.vt <-
    setNames(data.frame(assay(output.vt[[1]]), row.names = rowData(output.vt[[1]])[, 1]),
             colData(output.vt[[1]])[, 1])
  row.names(cellDetails) <- cellDetails$RName
  en.output.vt$Tissue <-
    cellDetails[row.names(en.output.vt), "CellName"]
  en.output.vt$source <- "VT"
  en.output.vt <- en.output.vt[order(-en.output.vt$Log10PValue),]
  en.output.vt <-
    merge(en.output.vt, vt.md, by = "row.names", all.x = TRUE)
  en.output.vt <- rownames_to_column(en.output.vt, var = "Name")
  output.xi <- teEnrichmentCustom(gs1, te.dataset.xiang)
  en.output.xi <-
    setNames(data.frame(assay(output.xi[[1]]), row.names = rowData(output.xi[[1]])[, 1]),
             colData(output.xi[[1]])[, 1])
  en.output.xi$Tissue <- rownames(en.output.xi)
  en.output.xi$source <- "Xi"
  en.output.xi <- en.output.xi[order(-en.output.xi$Log10PValue),]
  en.output.xi <-
    merge(en.output.xi, xi.md, by = "row.names", all.x = TRUE)
  en.output.xi <- rownames_to_column(en.output.xi, var = "Name")
  output.zp <- teEnrichmentCustom(gs1, te.dataset.castel)
  en.output.zp <-
    setNames(data.frame(assay(output.zp[[1]]), row.names = rowData(output.zp[[1]])[, 1]),
             colData(output.zp[[1]])[, 1])
  en.output.zp$Tissue <- rownames(en.output.zp)
  en.output.zp$source <- "ZP"
  en.output.zp <- en.output.zp[order(-en.output.zp$Log10PValue),]
  en.output.zp <-
    merge(en.output.zp, zp.md, by = "row.names", all.x = TRUE)
  en.output.zp <- rownames_to_column(en.output.zp, var = "Name")
  en.conbined <- rbind(en.output.vt, en.output.xi, en.output.zp)
  
  #  p <- 0.05
  #  logp <- -log10(p)
  en.conbined <-  en.conbined %>%
    #    mutate(Log10PValue = replace(Log10PValue, Log10PValue < logp, 0))
    #  en.conbined %>%
    group_by(source) %>%
    arrange(source, desc(Log10PValue)) %>% dplyr::slice(1:7)  %>%
    ungroup %>%
    mutate(
      source = as.factor(source),
      CellNames = tidytext::reorder_within(CellNames, Log10PValue, source, sep = ":")
    )
  ggplot(en.conbined, aes(CellNames, Log10PValue)) + geom_bar(stat = 'identity', fill = barcolor) +  theme_minimal() +
    theme(
      axis.text.x = element_text(
        vjust = 1,
        hjust = 1,
        size = 10
      ),
      axis.text.y = element_text(size = 8),
      plot.margin = margin(10, 10, 10, 100),
      legend.position = "none",
      plot.title = element_text(
        color = "black",
        size = 10,
        face = "bold.italic"
      ),
      axis.title.y = element_blank(),
      axis.line.x = element_line(
        colour = 'black',
        size = 0.5,
        linetype = 'solid'
      ),
      axis.ticks.x = element_line(
        colour = 'black',
        size = 1,
        linetype = 'solid'
      ),
      axis.title.x = element_text(
        color = "black",
        size = 10,
        face = "bold"
      )
    )  +
    scale_y_continuous(expand = expansion(mult = c(0, .1)), breaks = pretty_breaks()) + ylab("-log10 p-value") +
    facet_wrap( ~ source, scales = "free", ncol = 3) +
    coord_flip()
    
}

PCE for cluster 1 markers

runpce(markers.filtered.names.1, color_list[1])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 2 markers

runpce(markers.filtered.names.2, color_list[2])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 3 markers

runpce(markers.filtered.names.3, color_list[3])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 4 markers

runpce(markers.filtered.names.4, color_list[4])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 5 markers

runpce(markers.filtered.names.5, color_list[5])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 6 markers

runpce(markers.filtered.names.6, color_list[6])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 7 markers

runpce(markers.filtered.names.7, color_list[7])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 8 markers

runpce(markers.filtered.names.8, color_list[8])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 9 markers

runpce(markers.filtered.names.9, color_list[9])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

runpce(markers.filtered.names.10, color_list[10])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

runpce(markers.filtered.names.11, color_list[11])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

Inspecting STB clusters (cluster 4 and 8)

# findmarkers
markers.all.4vs8 <- FindMarkers(bapd8.integrated, ident.1 = 4, ident.2 = 8)
markers.all.8vs4 <- FindMarkers(bapd8.integrated, ident.1 = 8, ident.2 = 4)
#convert FC
markers.all.4vs8$avg_FC <- 2^markers.all.4vs8$avg_log2FC
markers.all.8vs4$avg_FC <- 2^markers.all.8vs4$avg_log2FC
#Filter
markers.filtered.4vs8 <- markers.all.4vs8 %>% filter(avg_FC >= 1.5) %>% filter(p_val_adj <= 0.05)
markers.filtered.8vs4 <- markers.all.8vs4 %>% filter(avg_FC >= 1.5) %>% filter(p_val_adj <= 0.05)
#list
markers.filtered.names.4vs8 <- rownames(markers.filtered.4vs8)
markers.filtered.names.8vs4 <- rownames(markers.filtered.8vs4)

PCE for cluster 4 markers (vs. 8)

runpce(markers.filtered.names.4vs8, color_list[4])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

PCE for cluster 8 markers (vs. 4)

runpce(markers.filtered.names.4vs8, color_list[5])
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.
#> No background list provided. Using all the
#>                 genes as background.

Plotting functions

Some plotting functions that we used for finalizing violin plots.

# https://divingintogeneticsandgenomics.rbind.io/post/stacked-violin-plot-for-visualizing-single-cell-data-in-seurat/
modify_vlnplot<- function(obj,
                          feature,
                          pt.size = 0,
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...) {
  p<- VlnPlot(obj, features = feature, pt.size = pt.size, ... )  +
    xlab("") + ylab(feature) + ggtitle("") +
    theme(legend.position = "none",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.y = element_text(size = rel(1), angle = 0),
          axis.text.y = element_text(size = rel(1)),
          plot.margin = plot.margin )
  return(p)
}

extract_max<- function(p){
  ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range)
  return(ceiling(ymax))
}


StackedVlnPlot<- function(obj, features,
                          pt.size = 0,
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...) {

  plot_list<- purrr::map(features, function(x) modify_vlnplot(obj = obj,feature = x, ...))

  # Add back x-axis title to bottom plot. patchwork is going to support this?
  plot_list[[length(plot_list)]]<- plot_list[[length(plot_list)]] +
    theme(axis.text.x=element_text(), axis.ticks.x = element_line())

  # change the y-axis tick to only max value
  ymaxs<- purrr::map_dbl(plot_list, extract_max)
  plot_list<- purrr::map2(plot_list, ymaxs, function(x,y) x +
                            scale_y_continuous(breaks = c(y)) +
                            expand_limits(y = y))

  p<- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)
  return(p)
}

Find all Markers

Find all markers using the in build function of Seurat

bapd8.markers <- FindAllMarkers(bapd8.integrated, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#> Calculating cluster 1
#> Calculating cluster 2
#> Calculating cluster 3
#> Calculating cluster 4
#> Calculating cluster 5
#> Calculating cluster 6
#> Calculating cluster 7
#> Calculating cluster 8
#> Calculating cluster 9
#> Calculating cluster 10
#> Calculating cluster 11
bapd8.markers.ranked.10.percluster <- bapd8.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
datatable(bapd8.markers.ranked.10.percluster, rownames = FALSE, filter="top", options = list(pageLength = 10, scrollX=T))

Find Conserved markers

This is optional. We did not use the marker genes specific for clusters of each condition.

lhs.f  <- paste("markers.conserved.", 1:num.clusters, sep="")
rhs.f <- paste("FindConservedMarkers(bapd8.integrated, ident.1 = ",1:num.clusters,', grouping.var = "replicate", verbose = FALSE)', sep="")
commands.f <- paste(paste(lhs.f, rhs.f, sep="<-"), collapse=";")
eval(parse(text=commands.f))

Expression tables

To export average expression levels for all genes, summarized based on all cells, each condition and each replicate, we used AverageExpression command for Seurat.

cluster.averages.data <- AverageExpression(bapd8.integrated, slot = "data", assays = "RNA")
condition.averages.data <- AverageExpression(bapd8.integrated, slot = "data", group.by = "orig.ident", assays = "RNA")
replicate.averages.data <- AverageExpression(bapd8.integrated, slot = "data", group.by = "replicate", assays = "RNA")
avg.data <- cluster.averages.data[["RNA"]]
avg.condition <- condition.averages.data[["RNA"]]
avg.replicate <- replicate.averages.data[["RNA"]]
write.table(avg.data, file="snn-average-data.tsv", sep= "\t")
write.table(avg.condition, file="snn-average-condition.tsv", sep= "\t")
write.table(avg.replicate, file="snn-average-replicate.tsv", sep= "\t")
#avg.data$gene <- row.names(avg.data)
#avg.data <- as_tibble(avg.data)
#head(avg.data)
#avg.data <- avg.data[, c(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)]
#colnames(avg.data) <- c("Gene", paste("Cluster", colnames(avg.data[2:12]), sep = "_"))
#datatable(avg.data, rownames = FALSE, filter="top", options = list(pageLength = 10, scrollX=T))

Cell numbers per clusters

Generate a summary table showing the number of cells in each cluster, broken down by replicates and treatment.

cells <- bapd8.integrated@meta.data %>%
  dplyr::group_by(orig.ident,   new_clusters, replicate)    %>%
  dplyr::summarise(length(new_clusters)) %>%
  dplyr::rename(sample = orig.ident,
                cluster = new_clusters,
                condition = replicate,
                number.of.cells = `length(new_clusters)`)
#> `summarise()` has grouped output by 'orig.ident', 'new_clusters'. You can
#> override using the `.groups` argument.

#head(cells)
#datatable(cells, rownames = TRUE, filter="top", options = list(pageLength = 10, scrollX=T))
#cells %>%
 # group_by(orig.ident, new_clusters ) %>%
 # summarize(`length(new_clusters)`)

ggplot(cells, aes(x = cluster, y = number.of.cells, fill = cluster )) +
  geom_col() +
  facet_wrap(~condition) +
  theme_classic() +
  theme(legend.position = "none")

DE between conditions and Volcano plots

The DE was carried out between the conditions for each cluster. First we modified the metadata to create a separate column that has both the condition as well as the cluster number and then we used Seurat’s FindMarkers to find the genes that are differentially expressed. The genes that have log2FC > 1 or <-1 are shown in color if they also have p-val <0.05.

head(bapd8.integrated@meta.data)
#>                           orig.ident nCount_RNA nFeature_RNA replicate
#> AAACGAAGTGAGACGT-5pcO2_r1   5pcO2_r1        921          687     5pcO2
#> AAACGCTAGCCGATAG-5pcO2_r1   5pcO2_r1       1786         1053     5pcO2
#> AAAGAACAGGCACTAG-5pcO2_r1   5pcO2_r1       1091          573     5pcO2
#> AAAGAACTCATTTCCA-5pcO2_r1   5pcO2_r1      21054         4986     5pcO2
#> AAAGGATTCCGTAGTA-5pcO2_r1   5pcO2_r1       9718         3540     5pcO2
#> AAAGGTAGTAGGGAGG-5pcO2_r1   5pcO2_r1      18482         5317     5pcO2
#>                           integrated_snn_res.0.5 seurat_clusters new_clusters
#> AAACGAAGTGAGACGT-5pcO2_r1                      2               2            3
#> AAACGCTAGCCGATAG-5pcO2_r1                      0               0            1
#> AAAGAACAGGCACTAG-5pcO2_r1                      2               2            3
#> AAAGAACTCATTTCCA-5pcO2_r1                      4               4            5
#> AAAGGATTCCGTAGTA-5pcO2_r1                      8               8            9
#> AAAGGTAGTAGGGAGG-5pcO2_r1                      2               2            3
df <- bapd8.integrated@meta.data
df$stim <- (paste(df$replicate,df$new_clusters, sep = "."))
df$stim <- gsub('5pcO2', 'FIVE', df$stim)
df$stim <- gsub('20pcO2', 'TWENTY', df$stim)
bapd8.integrated@meta.data <- df
Idents(bapd8.integrated) <- "stim"
lhs.g  <- paste("clus", 1:num.clusters, ".five.twenty", sep="")
rhs.g <- paste('FindMarkers(bapd8.integrated, ident.1 = "FIVE.', 1:num.clusters, '", ident.2 = "TWENTY.', 1:num.clusters, '", verbose = FALSE, logfc.threshold = 0)', sep="")
commands.g <- paste(paste(lhs.g, rhs.g, sep="<-"), collapse=";")
eval(parse(text=commands.g))
# lhs.h  <- paste("clus", 1:num.clusters, ".five.twenty$log2fc", sep="")
# rhs.h <- paste('log2(exp(clus', 1:num.clusters, '.five.twenty$avg_logFC))', sep="")
# commands.h <- paste(paste(lhs.h, rhs.h, sep="<-"), collapse=";")
# eval(parse(text=commands.h))
lhs.j  <- paste("clus", 1:num.clusters, ".five.twenty$Gene", sep="")
rhs.j <- paste('row.names(clus', 1:num.clusters, '.five.twenty)', sep="")
commands.j <- paste(paste(lhs.j, rhs.j, sep="<-"), collapse=";")
eval(parse(text=commands.j))
lhs.k  <- paste("clus", 1:num.clusters, '.five.twenty$diffexpressed <- "other genes"', sep="")
commands.k <- paste(lhs.k , sep=";")
eval(parse(text=commands.k))
lhs.l  <- paste('clus', 1:num.clusters,'.five.twenty$diffexpressed[clus',1:num.clusters,'.five.twenty$avg_log2FC >= 0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05] <- "up in 5% O2"', sep="")
commands.l <- paste(lhs.l , sep=";")
eval(parse(text=commands.l))
lhs.m  <- paste('clus', 1:num.clusters,'.five.twenty$diffexpressed[clus',1:num.clusters,'.five.twenty$avg_log2FC <= -0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05] <- "up in 20% O2"', sep="")
commands.m <- paste(lhs.m , sep=";")
eval(parse(text=commands.m))
lhs.n  <- paste('clus', 1:num.clusters,'.five.twenty$delabel <- ""', sep="")
commands.n <- paste(lhs.n , sep=";")
eval(parse(text=commands.n))
lhs.o  <- paste('clus', 1:num.clusters,'.five.twenty$delabel[clus', 1:num.clusters,'.five.twenty$avg_log2FC >= 0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05]', sep="")
rhs.o  <- paste('clus', 1:num.clusters,'.five.twenty$Gene[clus', 1:num.clusters,'.five.twenty$avg_log2FC >= 0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05]', sep="")
commands.o <- paste(paste(lhs.o, rhs.o, sep="<-"), collapse=";")
eval(parse(text=commands.o))
lhs.p  <- paste('clus', 1:num.clusters,'.five.twenty$delabel[clus', 1:num.clusters,'.five.twenty$avg_log2FC <= -0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05]', sep="")
rhs.p  <- paste('clus', 1:num.clusters,'.five.twenty$Gene[clus', 1:num.clusters,'.five.twenty$avg_log2FC <= -0.584962501 & clus', 1:num.clusters,'.five.twenty$p_val_adj <= 0.05]', sep="")
commands.p <- paste(paste(lhs.p, rhs.p, sep="<-"), collapse=";")
eval(parse(text=commands.p))

Volcano Plot function: This plots allows us to visualize the genes that are overexpressed in each condition along with its p-value. First, we will setup a function to make a volcano plot and then we call them for each cluster and depict them as an interactive plot.


myVolcanoPlot <- function(mydf, clus.number) {
de <- ggplot(data=mydf, aes(x=avg_log2FC, y=-log10(p_val), col=diffexpressed, label=delabel)) +
  geom_point(alpha = 0.5) +
  theme_classic() +
  scale_color_manual(name = "Expression", values=c("#4d4d4d", "#ca0020", "#0571b0")) +
  ggtitle(paste("Cluster ", clus.number, ": 20% O2 vs. 5% O2")) +
  xlab("Log2 fold change") +
  ylab("-log10 pvalue (adjusted)") +
  theme(legend.text.align = 0)
return(de)
}

Volcano plot for cluster 1

ggplotly(myVolcanoPlot(clus1.five.twenty, 1))

Fig.6-1: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 1

Volcano plot for cluster 2

ggplotly(myVolcanoPlot(clus2.five.twenty, 2))

Fig.6-2: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 2

ggsave("Figure_6_A.svg", dpi=900, width = 8, height = 6)

Volcano plot for cluster 3

ggplotly(myVolcanoPlot(clus3.five.twenty, 3))

Fig.6-3: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 3

Volcano plot for cluster 4

ggplotly(myVolcanoPlot(clus4.five.twenty, 4))

Fig.6-4: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 4

Volcano plot for cluster 5

ggplotly(myVolcanoPlot(clus5.five.twenty, 5))

Fig.6-5: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 5

Volcano plot for cluster 6

ggplotly(myVolcanoPlot(clus6.five.twenty, 6))

Fig.6-6: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 6

Volcano plot for cluster 7

ggplotly(myVolcanoPlot(clus7.five.twenty, 7))

Fig.6-7: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 7

Volcano plot for cluster 8

ggplotly(myVolcanoPlot(clus8.five.twenty, 8))

Fig.6-8: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 8

Volcano plot for cluster 9

ggplotly(myVolcanoPlot(clus9.five.twenty, 9))

Fig.6-9: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 9

Volcano plot for cluster 10

ggplotly(myVolcanoPlot(clus10.five.twenty, 10))

Fig.6-10: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 10

Volcano plot for cluster 11

ggplotly(myVolcanoPlot(clus11.five.twenty, 11))

Fig.6-11: Interactive Volcano plot showing genes upregulated in 5% and 20% oxygen conditions for cluster 11

Figures from the publication

Prepare dataset for individual plots

DefaultAssay(bapd8.integrated) <- "RNA"
Idents(bapd8.integrated) <- "new_clusters"
cluster4n8 <- subset(bapd8.integrated, idents = c("4", "8"))
#cluster2356 <- subset(bapd8.integrated, idents = c("1", "3", "4", "8"))
#cluster2n3 <- subset(bapd8.integrated, idents = c("2", "3"))
Idents(cluster4n8) <- "new_clusters"
#Idents(cluster2356) <- "new_clusters"
#Idents(cluster2n3) <- "new_clusters"
#DefaultAssay(cluster2356) <- "RNA"
DefaultAssay(cluster4n8) <- "RNA"
#DefaultAssay(cluster2n3) <- "RNA"

Figure 3

Transcription factors

fig3a <- c("GATA3", "TFAP2A")
multi_dittoPlot(bapd8.integrated, vars = fig3a, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3A: Violin plots showing expression (average log fold change) for genes encoding transcription factors

Fig3A: Violin plots showing expression (average log fold change) for genes encoding transcription factors

Structural proteins

fig3b <- c("KRT7", "KRT23")
multi_dittoPlot(bapd8.integrated, vars = fig3b, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3B: Violin plots showing expression (average log fold change) for genes encoding Structural proteins

Fig3B: Violin plots showing expression (average log fold change) for genes encoding Structural proteins

Hormones

fig3c <- c("CGA", "PGF")
multi_dittoPlot(bapd8.integrated, vars = fig3c, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3C: Violin plots showing expression (average log fold change) for genes encoding Hormones

Fig3C: Violin plots showing expression (average log fold change) for genes encoding Hormones

Transporters and Carcinoembryonic Antigen

fig3d <- c("SLC40A1", "XAGE2")
multi_dittoPlot(bapd8.integrated, vars = fig3d, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3D: Violin plots showing expression (average log fold change) for genes encoding Transporters and Carcinoembryonic Antigen

Fig3D: Violin plots showing expression (average log fold change) for genes encoding Transporters and Carcinoembryonic Antigen

Enzymes

fig3e <- c("CYP11A1", "HSD3B1")
multi_dittoPlot(bapd8.integrated, vars = fig3e, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3E: Violin plots showing expression (average log fold change) for genes encoding enzymes

Fig3E: Violin plots showing expression (average log fold change) for genes encoding enzymes

Long non-coding RNAs

fig3f <- c("MALAT1", "NEAT1")
multi_dittoPlot(bapd8.integrated, vars = fig3f, group.by = "new_clusters",
                       vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 2, color.panel = color_list)
Fig3F: Violin plots showing expression (average log fold change) for genes encoding lncRNAs

Fig3F: Violin plots showing expression (average log fold change) for genes encoding lncRNAs

Figure 5

inputGenes<-toupper(markers.filtered.names.4)
humanGene<-humanGeneMapping[humanGeneMapping$Gene.name %in% inputGenes,]
inputGenes<-humanGene$Gene
expressionData<-data[intersect(row.names(data),humanGeneMapping$Gene),]
se<-SummarizedExperiment(assays = SimpleList(as.matrix(expressionData)),rowData = row.names(expressionData),colData = colnames(expressionData))
cellSpecificGenesExp<-teGeneRetrieval(se,expressedGeneThreshold = 1)
se<-SummarizedExperiment(assays = SimpleList(as.matrix(expressionData)),rowData = row.names(expressionData),colData = colnames(expressionData))
cellSpecificGenesExp<-teGeneRetrieval(se,expressedGeneThreshold = 1)
print(length(inputGenes))
#> [1] 606
gs<-GeneSet(geneIds=toupper(inputGenes))
output2<-teEnrichmentCustom(gs,cellSpecificGenesExp)
#> No background list provided. Using all the
#>                 genes as background.
enrichmentOutput<-setNames(data.frame(assay(output2[[1]]),row.names = rowData(output2[[1]])[,1]),colData(output2[[1]])[,1])
row.names(cellDetails)<-cellDetails$RName
enrichmentOutput$Tissue<- cellDetails[row.names(enrichmentOutput),"CellName"]
sct.cluster4.genes <- as.data.frame(assay(output2[[2]][["SCT"]]))[1]
sct.cluster4.genes <- humanGeneMapping[humanGeneMapping$Gene %in% as.list(sct.cluster4.genes$Gene),]
sct.cluster4.genes <- sct.cluster4.genes$Gene.name
# PCE cluster 6
inputGenes<-toupper(markers.filtered.names.8)
humanGene<-humanGeneMapping[humanGeneMapping$Gene.name %in% inputGenes,]
inputGenes<-humanGene$Gene
expressionData<-data[intersect(row.names(data),humanGeneMapping$Gene),]
se<-SummarizedExperiment(assays = SimpleList(as.matrix(expressionData)),rowData = row.names(expressionData),colData = colnames(expressionData))
cellSpecificGenesExp<-teGeneRetrieval(se,expressedGeneThreshold = 1)
se<-SummarizedExperiment(assays = SimpleList(as.matrix(expressionData)),rowData = row.names(expressionData),colData = colnames(expressionData))
cellSpecificGenesExp<-teGeneRetrieval(se,expressedGeneThreshold = 1)
print(length(inputGenes))
#> [1] 437
gs<-GeneSet(geneIds=toupper(inputGenes))
output2<-teEnrichmentCustom(gs,cellSpecificGenesExp)
#> No background list provided. Using all the
#>                 genes as background.
enrichmentOutput<-setNames(data.frame(assay(output2[[1]]),row.names = rowData(output2[[1]])[,1]),colData(output2[[1]])[,1])
row.names(cellDetails)<-cellDetails$RName
enrichmentOutput$Tissue<- cellDetails[row.names(enrichmentOutput),"CellName"]
sct.cluster8.genes <- as.data.frame(assay(output2[[2]][["SCT"]]))[1]
sct.cluster8.genes <- humanGeneMapping[humanGeneMapping$Gene %in% as.list(sct.cluster8.genes$Gene),]
sct.cluster8.genes <- sct.cluster8.genes$Gene.name
x = list(sct.cluster4.genes, sct.cluster8.genes)
names(x) <- c("STB genes of Cluster 4","STB genes of Cluster 8")
ggvenn(
    x,
    fill_color = color_list[4:8],
    stroke_size = NA,
    set_name_size = 4,
    show_percentage = FALSE
)
#> Warning in sprintf("%d", n, 100 * n/sum(n)): one argument not used by format
#> '%d'
Fig5A: STB specific genes shared by clusters 5 and 6

Fig5A: STB specific genes shared by clusters 5 and 6


fig5a <- c("KRT8", "S100P", "XAGE2")
fig5b <- c("ERVV-1", "TBX3", "GRHL1")

multi_dittoPlot(cluster4n8, vars = fig5a, group.by = "new_clusters",
                vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 1, color.panel = c(color_list[5:6]))

multi_dittoPlot(cluster4n8, vars = fig5b, group.by = "new_clusters",
                vlnplot.lineweight = 0.2, jitter.size = 0.3, ncol = 1, color.panel = c(color_list[5:6]))
Fig5: STB specific genes shared by clusters 5 and 6. Some highly expressed STB specific genes show (A) higher expression in cluster 5 and (B) higher expression in cluster 6Fig5: STB specific genes shared by clusters 5 and 6. Some highly expressed STB specific genes show (A) higher expression in cluster 5 and (B) higher expression in cluster 6

Fig5: STB specific genes shared by clusters 5 and 6. Some highly expressed STB specific genes show (A) higher expression in cluster 5 and (B) higher expression in cluster 6

Save RDS file

Finally we will save the entire session data to an external file. This can be explored again by loading it in R in the future if there is any need.

saveRDS(bapd8.integrated, 'bapd8.integrated.rds')

Session Info

Complete session information

sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.2 LTS
#> 
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] grid      stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] SeuratWrappers_0.3.0        enrichR_3.0                
#>  [3] ape_5.6-1                   DT_0.20                    
#>  [5] plotly_4.10.0               ggvenn_0.1.9               
#>  [7] scales_1.1.1                ComplexHeatmap_2.10.0      
#>  [9] dittoSeq_1.6.0              ggrepel_0.9.1              
#> [11] calibrate_1.7.7             MASS_7.3-55                
#> [13] enhancedDimPlot_0.0.0.9100  forcats_0.5.1              
#> [15] purrr_0.3.4                 readr_2.1.2                
#> [17] tidyr_1.2.0                 tibble_3.1.6               
#> [19] tidyverse_1.3.1             gprofiler2_0.2.1           
#> [21] TissueEnrich_1.14.0         GSEABase_1.56.0            
#> [23] graph_1.72.0                annotate_1.72.0            
#> [25] XML_3.99-0.8                AnnotationDbi_1.56.2       
#> [27] SummarizedExperiment_1.24.0 GenomicRanges_1.46.1       
#> [29] GenomeInfoDb_1.30.1         IRanges_2.28.0             
#> [31] S4Vectors_0.32.3            MatrixGenerics_1.6.0       
#> [33] matrixStats_0.61.0          ensurer_1.1                
#> [35] stringr_1.4.0               dplyr_1.0.8                
#> [37] gridExtra_2.3               multtest_2.50.0            
#> [39] Biobase_2.54.0              BiocGenerics_0.40.0        
#> [41] metap_1.8                   patchwork_1.1.1            
#> [43] cowplot_1.1.1               ggplot2_3.3.5              
#> [45] kableExtra_1.3.4            knitr_1.37                 
#> [47] SeuratObject_4.0.4          Seurat_4.1.0               
#> 
#> loaded via a namespace (and not attached):
#>   [1] SnowballC_0.7.0             scattermore_0.8            
#>   [3] R.methodsS3_1.8.1           ragg_1.2.1                 
#>   [5] bit64_4.0.5                 R.utils_2.11.0             
#>   [7] irlba_2.3.5                 multcomp_1.4-18            
#>   [9] DelayedArray_0.20.0         data.table_1.14.2          
#>  [11] rpart_4.1.16                KEGGREST_1.34.0            
#>  [13] RCurl_1.98-1.6              doParallel_1.0.17          
#>  [15] generics_0.1.2              TH.data_1.1-0              
#>  [17] RSQLite_2.2.9               RANN_2.6.1                 
#>  [19] future_1.23.0               tokenizers_0.2.1           
#>  [21] bit_4.0.4                   tzdb_0.2.0                 
#>  [23] mutoss_0.1-12               spatstat.data_2.1-2        
#>  [25] webshot_0.5.2               xml2_1.3.3                 
#>  [27] lubridate_1.8.0             httpuv_1.6.5               
#>  [29] assertthat_0.2.1            xfun_0.29                  
#>  [31] hms_1.1.1                   jquerylib_0.1.4            
#>  [33] evaluate_0.14               promises_1.2.0.1           
#>  [35] fansi_1.0.2                 dbplyr_2.1.1               
#>  [37] readxl_1.3.1                igraph_1.2.11              
#>  [39] DBI_1.1.2                   tmvnsim_1.0-2              
#>  [41] htmlwidgets_1.5.4           spatstat.geom_2.3-2        
#>  [43] paletteer_1.4.0             ellipsis_0.3.2             
#>  [45] RSpectra_0.16-0             crosstalk_1.2.0            
#>  [47] backports_1.4.1             bookdown_0.24              
#>  [49] deldir_1.0-6                vctrs_0.3.8                
#>  [51] SingleCellExperiment_1.16.0 Cairo_1.5-14               
#>  [53] remotes_2.4.2               ROCR_1.0-11                
#>  [55] abind_1.4-5                 cachem_1.0.6               
#>  [57] withr_2.4.3                 sctransform_0.3.3          
#>  [59] goftest_1.2-3               mnormt_2.0.2               
#>  [61] svglite_2.1.0               cluster_2.1.2              
#>  [63] lazyeval_0.2.2              crayon_1.5.0               
#>  [65] labeling_0.4.2              pkgconfig_2.0.3            
#>  [67] qqconf_1.1.1                vipor_0.4.5                
#>  [69] nlme_3.1-155                rlang_1.0.1                
#>  [71] globals_0.14.0              lifecycle_1.0.1            
#>  [73] miniUI_0.1.1.1              sandwich_3.0-1             
#>  [75] mathjaxr_1.4-0              rsvd_1.0.5                 
#>  [77] modelr_0.1.8                tidytext_0.3.2             
#>  [79] ggrastr_1.0.1               cellranger_1.1.0           
#>  [81] polyclip_1.10-0             lmtest_0.9-39              
#>  [83] Matrix_1.4-0                zoo_1.8-9                  
#>  [85] beeswarm_0.4.0              reprex_2.0.1               
#>  [87] ggridges_0.5.3              GlobalOptions_0.1.2        
#>  [89] pheatmap_1.0.12             png_0.1-7                  
#>  [91] viridisLite_0.4.0           rjson_0.2.21               
#>  [93] bitops_1.0-7                R.oo_1.24.0                
#>  [95] KernSmooth_2.23-20          Biostrings_2.62.0          
#>  [97] blob_1.2.2                  shape_1.4.6                
#>  [99] parallelly_1.30.0           spatstat.random_2.1-0      
#> [101] memoise_2.0.1               magrittr_2.0.2             
#> [103] plyr_1.8.6                  ica_1.0-2                  
#> [105] zlibbioc_1.40.0             compiler_4.1.0             
#> [107] RColorBrewer_1.1-2          clue_0.3-60                
#> [109] plotrix_3.8-2               fitdistrplus_1.1-6         
#> [111] cli_3.2.0                   XVector_0.34.0             
#> [113] listenv_0.8.0               janeaustenr_0.1.5          
#> [115] pbapply_1.5-0               mgcv_1.8-38                
#> [117] tidyselect_1.1.1            stringi_1.7.6              
#> [119] textshaping_0.3.6           highr_0.9                  
#> [121] yaml_2.2.2                  sass_0.4.0                 
#> [123] tools_4.1.0                 future.apply_1.8.1         
#> [125] parallel_4.1.0              circlize_0.4.14            
#> [127] rstudioapi_0.13             foreach_1.5.2              
#> [129] farver_2.1.0                rmdformats_1.0.3           
#> [131] Rtsne_0.15                  BiocManager_1.30.16        
#> [133] digest_0.6.29               shiny_1.7.1                
#> [135] Rcpp_1.0.8                  broom_0.7.12               
#> [137] later_1.3.0                 RcppAnnoy_0.0.19           
#> [139] httr_1.4.2                  Rdpack_2.1.3               
#> [141] colorspace_2.0-2            rvest_1.0.2                
#> [143] fs_1.5.2                    tensor_1.5                 
#> [145] reticulate_1.24             splines_4.1.0              
#> [147] uwot_0.1.11                 rematch2_2.1.2             
#> [149] sn_2.0.1                    spatstat.utils_2.3-0       
#> [151] systemfonts_1.0.4           xtable_1.8-4               
#> [153] jsonlite_1.7.3              R6_2.5.1                   
#> [155] TFisher_0.2.0               pillar_1.7.0               
#> [157] htmltools_0.5.2             mime_0.12                  
#> [159] glue_1.6.1                  fastmap_1.1.0              
#> [161] codetools_0.2-18            mvtnorm_1.1-3              
#> [163] utf8_1.2.2                  lattice_0.20-45            
#> [165] bslib_0.3.1                 spatstat.sparse_2.1-0      
#> [167] numDeriv_2016.8-1.1         ggbeeswarm_0.6.0           
#> [169] curl_4.3.2                  leiden_0.3.9               
#> [171] limma_3.50.1                survival_3.2-13            
#> [173] rmarkdown_2.11              munsell_0.5.0              
#> [175] GetoptLong_1.0.5            GenomeInfoDbData_1.2.7     
#> [177] iterators_1.0.14            haven_2.4.3                
#> [179] reshape2_1.4.4              gtable_0.3.0               
#> [181] rbibutils_2.2.7             spatstat.core_2.4-0